from lxml import etree
text = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <title>Title</title>
</head>
<body>
<ul>
    <li id="li1"><a herf=''https://www.baidu.com>百度</a></li>
    <li id="li2"><a herf=''https://www.jd.com>京东</a></li>
    <li id="li3"><a herf=''https://www.taobao.com>淘宝</a></li>
    <li id="li4"><a herf=''https://www.aliyun.com>阿里云</a></li>
    <li id="li5"><a herf=''https://www.douyin.com>抖音</a></li>
</ul>
<ol id="ol1">
    <li id="li1">li11</li>
    <li id="li2">li22</li>
    <li id="li3">li33</li>
    <li id="li4">li44</li>
    <li id="li5">li55</li>
</ol>
<ol id="ol2">
    <li id="li11">li111</li>
    <li id="li22">li222</li>
    <li id="li33">li333</li>
    <li id="li44">li444</li>
    <li id="li55">li555</li>
</ol>
</body>
</html>
'''


tree = etree.HTML(text)
# print(dir(tree))
# print(tree.tag)
# data  = tree.findall('body/ul/li')
# for item in data:
#     print(item.text)

# xml_tree = etree.XML(text)
# print(xml_tree)
# data1  = tree.findall('.//li')
# for item in data1:
#     print(item.text, item.attrib)

# tree = etree.fromstring(text,parser=etree.XMLParser())




# lxml选择器

# cssselect
# data = tree.cssselect('ol li')
# for item in data:
#     print(item.text)
# print('===================')
# data1 = tree.cssselect('ul li')
# for item in data1:
#     a = item.cssselect('a')
#     print(a[0].text, a[0].attrib, a[0].tag)

# xpath
# data = tree.xpath('//li[@id="li1"]')
# for item in data:
#     print(item.text)

lis = tree.xpath('//li/a[text()]')
for li in lis:
    print(li.text)
ols = tree.xpath('//*[@id="ol1"]')
for ol in ols:
    print(ol.text)
# lis1 = ols.xpath('//li')
# print(lis1)